libname mylib 'g:\Dropbox\Wall Street Bets (Private)\Data';



*This program contains the information needed to create Table 12 of the paper;
*The program references:
1) taq_trading which is constructed in the  Daily Taq (Intermediate File) code and 
2) Daily_Panel_RFS which is  constructed in  Daily Panel RFS (Intermediate File) code;



data info;
set mylib.daily_panel_rfs;

IF TOTAL_DD_POSTS =. THEN TOTAL_DD_POSTS =0;

IF TOTAL_DD_POSTS >0 THEN DD_DUMMY =1; ELSE DD_DUMMY =0;

IF SA_POSTS >0 THEN SA_DUMMY =1; ELSE SA_DUMMY =0;
IF NON_RESEARCH2 >0 THEN NR_DUMMY = 1; ELSE NR_DUMMY =0;
keep ticker date cum_month gme_amc_flag post_gme
ret5 ret21 ret63 week2-week13
net_dd2  NET_DD2_POST NET_SA2 NET_SA2_POST NON_RESEARCH2 NON_RESEARCH2_POST
DD_and_SA DD_and_SA_POST DD_and_Non_Research DD_and_Non_Research_post net_dd2_pre
 ln_size ln_bm  abn_ret mom5 mom6_26     
BM_MISSING SIZE_MISSING   news_sentiment lag_sent5 lag_sent6_26 SENTIMENT21 TOTAL_DD_POSTS DD_DUMMY SA_DUMMY NR_DUMMY;
run;


proc sort data=mylib.taq_trading;
by ticker date;
run;




proc sort data=info;
by ticker date;
run;

proc sort data=mylib.taq_trading;;
by ticker date;
run;
data info2;
merge info  mylib.taq_trading;;
by ticker date;
if ret5 = . then delete;


*scaling by 10 so can interpret estimates as a decile change;
std_retail_trades121 = std_retail_trades121/10;
std_retail_VOL121 = std_retail_VOL121/10;
std_INST_VOL121 = std_INST_VOL121/10;


frenzy = std_retail_trades121;




*net_dd_other = net_dd_posts * (1-frenzy);
*net_dd_other_post_gme = net_dd_other * post_gme;
*net_dd_other_pre_gme = net_dd_other * pre_gme;
frenzy_post =  frenzy * post_gme;
frenzy_pre = frenzy * (1 - post_gme);


DD_FRENZY = DD_DUMMY * FRENZY;
SA_FRENZY = SA_DUMMY * FRENZY;
NR_FRENZY = NR_DUMMY * FRENZY;

DD_FRENZY_POST =  DD_FRENZY * POST_GME;
SA_FRENZY_POST =  SA_FRENZY * POST_GME;
NR_FRENZY_POST =  NR_FRENZY * POST_GME;

DD_FRENZY_pre =  DD_FRENZY * (1- POST_GME);
SA_FRENZY_pre =  SA_FRENZY * (1-POST_GME);
NR_FRENZY_pre =  NR_FRENZY * (1-POST_GME);




dd_dummy_post = dd_dummy * post_gme;
sa_dummy_post = sa_dummy * post_gme;
nr_dummy_post = nr_dummy * post_gme;

dd_dummy_pre = dd_dummy * (1-post_gme);
sa_dummy_pre = sa_dummy * (1-post_gme);
nr_dummy_pre = nr_dummy * (1-post_gme);


run;



PROC SORT nodupkey DATA=info2;
BY DATE ticker;
RUN;


proc standard data=info2 out=info2_fe mean=0;
by date;
var ret21    net_dd2  net_dd2_post     
NET_SA2 NET_SA2_POST NON_RESEARCH2 NON_RESEARCH2_POST  
 
 frenzy_post
ln_size ln_bm  abn_ret mom5 mom6_26      BM_MISSING SIZE_MISSING   news_sentiment lag_sent5 lag_sent6_26  

FRENZY FRENZY_POST frenzy_pre DD_FRENZY DD_FRENZY_POST dd_frenzy_pre SA_FRENZY SA_FRENZY_POST sa_frenzy_pre NR_FRENZY NR_FRENZY_POST nr_frenzy_pre
DD_DUMMY SA_DUMMY NR_DUMMY  dd_dummy_post sa_dummy sa_dummy_post nr_dummy nr_dummy_post dd_dummy_pre  sa_dummy_pre  nr_dummy_pre;
  *where gme_amc_flag =0;
quit;


*code below reports Specification 1 of Table 12;
*to obtain Specification 2, impose the filter "where gme_amc_flag  =0" by removing the "*" in the  line of code above;
*to obtain specifiation 3, add the following variables to the regression: net_dd2  net_dd2_post  NET_SA2 NET_SA2_POST NON_RESEARCH2 NON_RESEARCH2_POST
*to obtain specifiation 4, im mpose the filter "where gme_amc_flag  =0" by removing the "*" in the  line of code above 
and add the following variables to the regression: net_dd2  net_dd2_post  NET_SA2 NET_SA2_POST NON_RESEARCH2 NON_RESEARCH2_POST;



%let yourdata=info2_fe;

%let firmid=ticker ;
%let time=cum_month;

%let y=ret21 ;
%let x=   FRENZY FRENZY_POST DD_FRENZY DD_FRENZY_POST SA_FRENZy SA_FRENZY_POST NR_FRENZY NR_FRENZY_POST
SA_FRENZY SA_FRENZY_POST
 
ln_size ln_bm  abn_ret mom5 mom6_26  news_sentiment lag_sent5 lag_sent6_26    BM_MISSING SIZE_MISSING  ;


run;

proc surveyreg data=&yourdata;

    cluster &firmid;
    model &y = &x /covb;
    ods output covb=firm;
run;
quit;


*cluster by second dimension (e.g., year);
proc surveyreg data=&yourdata;

    cluster &time;
    model &y = &x /covb;
    ods output covb=year;
run;
quit;

*cluster by intersection of the two dimensions (e.g, firm-year);
proc surveyreg data=&yourdata;
    cluster &firmid &time;
    model &y = &x /covb;
    ods output covb=both;
    ods output parameterestimates=parm;
run;
quit;

*keeps original parameter estimates;
data parm; set parm;
    keep parameter estimate;
run;

*returns a dataset with a scalar for the dimensions of the var/cov matrix. This is needed to extract the square roots of the diagonals later on;
data parm1; set parm;
    n=_n_;
    m=1;
    keep m n;
run;

data parm1; set parm1; by m;
    if last.m;
    keep n;
run;

*uses matrix algebra interface to construct Var-cov matrix and extract the standard errors;
proc iml;
    use both;
    read all var _num_ into Z;
    print Z;
    use firm;
    read all var _num_ into X;
    print X;
    use year;
    read all var _num_ into Y;
    print Y;
    use parm1;
    read all var _num_ into n;
    print n;

    B=X+Y-Z;
    C=I(n);
    D=J(n,1);
    E=C#B;
    F=E*D;
    G=F##.5;

    print B;
    print G;
    create b from G [colname='stderr']; ;
    append from G;
quit;

*creates a dataset called 'results' that contains the parameter estimates, the SE's, and the t-stats;
data results; merge parm B;
    tstat=estimate/stderr;
run;

proc print data=results;
run;




data spec3;
set results;
estimate3 = estimate  ;
obs = _n_;
keep parameter estimate3 obs;
run;
data spec3b;
set results;
estimate3 = tstat ;
obs = _n_ + .5;
keep parameter estimate3 obs;
run;
data spec3;
set spec3 spec3b;
run;
proc sort data=spec3;
by obs;
run;
